In [2]:
# Import các thư viện cần thiết
In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn import tree
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
In [4]:
# Đọc dữ liệu
In [5]:
train = pd.read_csv('adult.data.csv')
test = pd.read_csv('adult.test.csv')
In [6]:
# Kiểm tra
In [7]:
print(train)
       age          workclass  fnlwgt    education  education-num  \
0       39          State-gov   77516    Bachelors             13   
1       50   Self-emp-not-inc   83311    Bachelors             13   
2       38            Private  215646      HS-grad              9   
3       53            Private  234721         11th              7   
4       28            Private  338409    Bachelors             13   
...    ...                ...     ...          ...            ...   
32556   27            Private  257302   Assoc-acdm             12   
32557   40            Private  154374      HS-grad              9   
32558   58            Private  151910      HS-grad              9   
32559   22            Private  201490      HS-grad              9   
32560   52       Self-emp-inc  287927      HS-grad              9   

            marital-status          occupation    relationship    race  \
0            Never-married        Adm-clerical   Not-in-family   White   
1       Married-civ-spouse     Exec-managerial         Husband   White   
2                 Divorced   Handlers-cleaners   Not-in-family   White   
3       Married-civ-spouse   Handlers-cleaners         Husband   Black   
4       Married-civ-spouse      Prof-specialty            Wife   Black   
...                    ...                 ...             ...     ...   
32556   Married-civ-spouse        Tech-support            Wife   White   
32557   Married-civ-spouse   Machine-op-inspct         Husband   White   
32558              Widowed        Adm-clerical       Unmarried   White   
32559        Never-married        Adm-clerical       Own-child   White   
32560   Married-civ-spouse     Exec-managerial            Wife   White   

           sex  capital-gain  capital-loss  hours-per-week  native-country  \
0         Male          2174             0              40   United-States   
1         Male             0             0              13   United-States   
2         Male             0             0              40   United-States   
3         Male             0             0              40   United-States   
4       Female             0             0              40            Cuba   
...        ...           ...           ...             ...             ...   
32556   Female             0             0              38   United-States   
32557     Male             0             0              40   United-States   
32558   Female             0             0              40   United-States   
32559     Male             0             0              20   United-States   
32560   Female         15024             0              40   United-States   

       income  
0       <=50K  
1       <=50K  
2       <=50K  
3       <=50K  
4       <=50K  
...       ...  
32556   <=50K  
32557    >50K  
32558   <=50K  
32559   <=50K  
32560    >50K  

[32561 rows x 15 columns]
In [8]:
# Tiến hành tiền xử lý dữ liệu
# Xóa những dòng có chứa dữ liệu trống, biết rằng dữ liệu trống được ký hiệu bằng dấu ‘?’.
In [9]:
train.replace(' ?',np.nan,inplace=True)
In [10]:
train=train.dropna()
In [11]:
train
Out[11]:
age workclass fnlwgt education education-num marital-status occupation relationship race sex capital-gain capital-loss hours-per-week native-country income
0 39 State-gov 77516 Bachelors 13 Never-married Adm-clerical Not-in-family White Male 2174 0 40 United-States <=50K
1 50 Self-emp-not-inc 83311 Bachelors 13 Married-civ-spouse Exec-managerial Husband White Male 0 0 13 United-States <=50K
2 38 Private 215646 HS-grad 9 Divorced Handlers-cleaners Not-in-family White Male 0 0 40 United-States <=50K
3 53 Private 234721 11th 7 Married-civ-spouse Handlers-cleaners Husband Black Male 0 0 40 United-States <=50K
4 28 Private 338409 Bachelors 13 Married-civ-spouse Prof-specialty Wife Black Female 0 0 40 Cuba <=50K
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
32556 27 Private 257302 Assoc-acdm 12 Married-civ-spouse Tech-support Wife White Female 0 0 38 United-States <=50K
32557 40 Private 154374 HS-grad 9 Married-civ-spouse Machine-op-inspct Husband White Male 0 0 40 United-States >50K
32558 58 Private 151910 HS-grad 9 Widowed Adm-clerical Unmarried White Female 0 0 40 United-States <=50K
32559 22 Private 201490 HS-grad 9 Never-married Adm-clerical Own-child White Male 0 0 20 United-States <=50K
32560 52 Self-emp-inc 287927 HS-grad 9 Married-civ-spouse Exec-managerial Wife White Female 15024 0 40 United-States >50K

30162 rows × 15 columns

In [12]:
test.replace(' ?',np.nan,inplace=True)
test=test.dropna()
test
Out[12]:
age workclass education education-num marital-status occupation relationship race sex capital-gain capital-loss hours-per-week native-country income
0 25 Private 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States <=50K
1 38 Private HS-grad 9 Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States <=50K
2 28 Local-gov Assoc-acdm 12 Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States >50K
3 44 Private Some-college 10 Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States >50K
5 34 Private 10th 6 Never-married Other-service Not-in-family White Male 0 0 30 United-States <=50K
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
16275 33 Private Bachelors 13 Never-married Prof-specialty Own-child White Male 0 0 40 United-States <=50K
16276 39 Private Bachelors 13 Divorced Prof-specialty Not-in-family White Female 0 0 36 United-States <=50K
16278 38 Private Bachelors 13 Married-civ-spouse Prof-specialty Husband White Male 0 0 50 United-States <=50K
16279 44 Private Bachelors 13 Divorced Adm-clerical Own-child Asian-Pac-Islander Male 5455 0 40 United-States <=50K
16280 35 Self-emp-inc Bachelors 13 Married-civ-spouse Exec-managerial Husband White Male 0 0 60 United-States >50K

15060 rows × 14 columns

In [13]:
#  Xóa cột final weight ‘fnlwgt’ trong dữ liệu huấn luyện vì cột này không có trong dữ liệu kiểm thử
In [14]:
del train["fnlwgt"]
In [15]:
train
Out[15]:
age workclass education education-num marital-status occupation relationship race sex capital-gain capital-loss hours-per-week native-country income
0 39 State-gov Bachelors 13 Never-married Adm-clerical Not-in-family White Male 2174 0 40 United-States <=50K
1 50 Self-emp-not-inc Bachelors 13 Married-civ-spouse Exec-managerial Husband White Male 0 0 13 United-States <=50K
2 38 Private HS-grad 9 Divorced Handlers-cleaners Not-in-family White Male 0 0 40 United-States <=50K
3 53 Private 11th 7 Married-civ-spouse Handlers-cleaners Husband Black Male 0 0 40 United-States <=50K
4 28 Private Bachelors 13 Married-civ-spouse Prof-specialty Wife Black Female 0 0 40 Cuba <=50K
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
32556 27 Private Assoc-acdm 12 Married-civ-spouse Tech-support Wife White Female 0 0 38 United-States <=50K
32557 40 Private HS-grad 9 Married-civ-spouse Machine-op-inspct Husband White Male 0 0 40 United-States >50K
32558 58 Private HS-grad 9 Widowed Adm-clerical Unmarried White Female 0 0 40 United-States <=50K
32559 22 Private HS-grad 9 Never-married Adm-clerical Own-child White Male 0 0 20 United-States <=50K
32560 52 Self-emp-inc HS-grad 9 Married-civ-spouse Exec-managerial Wife White Female 15024 0 40 United-States >50K

30162 rows × 14 columns

In [16]:
# Nối dữ liệu huấn luyện và kiểm thử lại với nhau để phục vụ các bước tiếp theo
In [17]:
df=pd.concat([train,test])
In [18]:
# Kiểm tra
In [19]:
df
Out[19]:
age workclass education education-num marital-status occupation relationship race sex capital-gain capital-loss hours-per-week native-country income
0 39 State-gov Bachelors 13 Never-married Adm-clerical Not-in-family White Male 2174 0 40 United-States <=50K
1 50 Self-emp-not-inc Bachelors 13 Married-civ-spouse Exec-managerial Husband White Male 0 0 13 United-States <=50K
2 38 Private HS-grad 9 Divorced Handlers-cleaners Not-in-family White Male 0 0 40 United-States <=50K
3 53 Private 11th 7 Married-civ-spouse Handlers-cleaners Husband Black Male 0 0 40 United-States <=50K
4 28 Private Bachelors 13 Married-civ-spouse Prof-specialty Wife Black Female 0 0 40 Cuba <=50K
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
16275 33 Private Bachelors 13 Never-married Prof-specialty Own-child White Male 0 0 40 United-States <=50K
16276 39 Private Bachelors 13 Divorced Prof-specialty Not-in-family White Female 0 0 36 United-States <=50K
16278 38 Private Bachelors 13 Married-civ-spouse Prof-specialty Husband White Male 0 0 50 United-States <=50K
16279 44 Private Bachelors 13 Divorced Adm-clerical Own-child Asian-Pac-Islander Male 5455 0 40 United-States <=50K
16280 35 Self-emp-inc Bachelors 13 Married-civ-spouse Exec-managerial Husband White Male 0 0 60 United-States >50K

45222 rows × 14 columns

In [20]:
print('Number of training data: ', len(train))
print('Number of training data: ', len(test))
Number of training data:  30162
Number of training data:  15060
In [21]:
df.info
Out[21]:
<bound method DataFrame.info of        age          workclass   education  education-num       marital-status  \
0       39          State-gov   Bachelors             13        Never-married   
1       50   Self-emp-not-inc   Bachelors             13   Married-civ-spouse   
2       38            Private     HS-grad              9             Divorced   
3       53            Private        11th              7   Married-civ-spouse   
4       28            Private   Bachelors             13   Married-civ-spouse   
...    ...                ...         ...            ...                  ...   
16275   33            Private   Bachelors             13        Never-married   
16276   39            Private   Bachelors             13             Divorced   
16278   38            Private   Bachelors             13   Married-civ-spouse   
16279   44            Private   Bachelors             13             Divorced   
16280   35       Self-emp-inc   Bachelors             13   Married-civ-spouse   

               occupation    relationship                 race      sex  \
0            Adm-clerical   Not-in-family                White     Male   
1         Exec-managerial         Husband                White     Male   
2       Handlers-cleaners   Not-in-family                White     Male   
3       Handlers-cleaners         Husband                Black     Male   
4          Prof-specialty            Wife                Black   Female   
...                   ...             ...                  ...      ...   
16275      Prof-specialty       Own-child                White     Male   
16276      Prof-specialty   Not-in-family                White   Female   
16278      Prof-specialty         Husband                White     Male   
16279        Adm-clerical       Own-child   Asian-Pac-Islander     Male   
16280     Exec-managerial         Husband                White     Male   

       capital-gain  capital-loss  hours-per-week  native-country  income  
0              2174             0              40   United-States   <=50K  
1                 0             0              13   United-States   <=50K  
2                 0             0              40   United-States   <=50K  
3                 0             0              40   United-States   <=50K  
4                 0             0              40            Cuba   <=50K  
...             ...           ...             ...             ...     ...  
16275             0             0              40   United-States   <=50K  
16276             0             0              36   United-States   <=50K  
16278             0             0              50   United-States   <=50K  
16279          5455             0              40   United-States   <=50K  
16280             0             0              60   United-States    >50K  

[45222 rows x 14 columns]>
In [22]:
df.info()
<class 'pandas.core.frame.DataFrame'>
Index: 45222 entries, 0 to 16280
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   age             45222 non-null  int64 
 1   workclass       45222 non-null  object
 2   education       45222 non-null  object
 3   education-num   45222 non-null  int64 
 4   marital-status  45222 non-null  object
 5   occupation      45222 non-null  object
 6   relationship    45222 non-null  object
 7   race            45222 non-null  object
 8   sex             45222 non-null  object
 9   capital-gain    45222 non-null  int64 
 10  capital-loss    45222 non-null  int64 
 11  hours-per-week  45222 non-null  int64 
 12  native-country  45222 non-null  object
 13  income          45222 non-null  object
dtypes: int64(5), object(9)
memory usage: 5.2+ MB
In [23]:
# Khảo sát độ tương đồng giữa các cột với nhau bằng công thức Pearson. Những cột nào có giá trị tương đồng cao thì hãy loại bỏ
In [24]:
pip install seaborn --upgrade
Requirement already satisfied: seaborn in c:\users\tento\anaconda3\envs\thktdl\lib\site-packages (0.13.2)
Requirement already satisfied: numpy!=1.24.0,>=1.20 in c:\users\tento\anaconda3\envs\thktdl\lib\site-packages (from seaborn) (1.26.0)
Requirement already satisfied: pandas>=1.2 in c:\users\tento\anaconda3\envs\thktdl\lib\site-packages (from seaborn) (2.1.1)
Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in c:\users\tento\anaconda3\envs\thktdl\lib\site-packages (from seaborn) (3.8.0)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\tento\anaconda3\envs\thktdl\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\tento\anaconda3\envs\thktdl\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\tento\anaconda3\envs\thktdl\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (4.25.0)
Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\tento\anaconda3\envs\thktdl\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.4.4)
Requirement already satisfied: packaging>=20.0 in c:\users\tento\anaconda3\envs\thktdl\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (23.1)
Requirement already satisfied: pillow>=6.2.0 in c:\users\tento\anaconda3\envs\thktdl\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (10.2.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\tento\anaconda3\envs\thktdl\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (3.0.9)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\tento\anaconda3\envs\thktdl\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in c:\users\tento\anaconda3\envs\thktdl\lib\site-packages (from pandas>=1.2->seaborn) (2023.3.post1)
Requirement already satisfied: tzdata>=2022.1 in c:\users\tento\anaconda3\envs\thktdl\lib\site-packages (from pandas>=1.2->seaborn) (2023.3)
Requirement already satisfied: six>=1.5 in c:\users\tento\anaconda3\envs\thktdl\lib\site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.4->seaborn) (1.16.0)
Note: you may need to restart the kernel to use updated packages.
In [25]:
# pip install seaborn --upgrade
df2 = df.select_dtypes(include=['int64'])
plt.figure(figsize=(16,9))
sns.heatmap(df2.corr(method='pearson'),annot=True)
Out[25]:
<Axes: >
No description has been provided for this image
In [27]:
# Tách các cột dữ liệu thành hai phần, một phần chứa các thuộc tính bình thường, một phần chứa riêng thuộc tính quyết định
In [28]:
feature = df.drop('income',axis=1)
label = df['income']
In [33]:
# Chuyển đổi các cột không phải dạng số về dạng one-hot vector để phù hợp với đầu vào của thư viện
In [29]:
feature.select_dtypes(exclude=['int64']).columns
Out[29]:
Index(['workclass', 'education', 'marital-status', 'occupation',
       'relationship', 'race', 'sex', 'native-country'],
      dtype='object')
In [30]:
feature_onehot = pd.get_dummies(feature,columns=feature.select_dtypes(exclude=['int64']).columns)
feature_onehot
Out[30]:
age education-num capital-gain capital-loss hours-per-week workclass_ Federal-gov workclass_ Local-gov workclass_ Private workclass_ Self-emp-inc workclass_ Self-emp-not-inc ... native-country_ Portugal native-country_ Puerto-Rico native-country_ Scotland native-country_ South native-country_ Taiwan native-country_ Thailand native-country_ Trinadad&Tobago native-country_ United-States native-country_ Vietnam native-country_ Yugoslavia
0 39 13 2174 0 40 False False False False False ... False False False False False False False True False False
1 50 13 0 0 13 False False False False True ... False False False False False False False True False False
2 38 9 0 0 40 False False True False False ... False False False False False False False True False False
3 53 7 0 0 40 False False True False False ... False False False False False False False True False False
4 28 13 0 0 40 False False True False False ... False False False False False False False False False False
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
16275 33 13 0 0 40 False False True False False ... False False False False False False False True False False
16276 39 13 0 0 36 False False True False False ... False False False False False False False True False False
16278 38 13 0 0 50 False False True False False ... False False False False False False False True False False
16279 44 13 5455 0 40 False False True False False ... False False False False False False False True False False
16280 35 13 0 0 60 False False False True False ... False False False False False False False True False False

45222 rows × 103 columns

In [34]:
# Tách các dòng dữ liệu ra thành hai phần huấn luyện và kiểm thử như ban đầu
In [31]:
x_train = feature_onehot[:30162]
x_test = feature_onehot[30162:]
y_train=label[:30162]
y_test=label[30162:]
In [ ]:
# Xây dựng cây ID3 dựa trên dữ liệu huấn luyện và sau đó tiến hành kiểm thử kết quả của cây bằng ma trận nhầm lẫn. Biểu diễn cây vào trong kết quả thực hiện
In [35]:
clf = tree.DecisionTreeClassifier(criterion="entropy",random_state=0)
clf.fit(x_train,y_train)
Out[35]:
DecisionTreeClassifier(criterion='entropy', random_state=0)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier(criterion='entropy', random_state=0)
In [ ]:
# Sau khi xây dựng xong cây ID3, tiến hành áp dụng mô hình trên dữ liệu kiểm thử
In [36]:
tree_pred = clf.predict(x_test)

tree_score = metrics.accuracy_score(y_test,tree_pred)
print("Accruracy:",tree_score)
print("Report:",metrics.classification_report(y_test,tree_pred))
Accruracy: 0.8175298804780876
Report:               precision    recall  f1-score   support

       <=50K       0.88      0.88      0.88     11360
        >50K       0.63      0.62      0.62      3700

    accuracy                           0.82     15060
   macro avg       0.75      0.75      0.75     15060
weighted avg       0.82      0.82      0.82     15060

In [ ]:
# tree_cm = metrics.confusion_matrix(y_test,tree_pred)
In [37]:
tree_cm = metrics.confusion_matrix(y_test,tree_pred)
In [ ]:
# Biểu diễn lên đồ thị heatmap
In [39]:
plt.figure(figsize=(12,12))
sns.heatmap(tree_cm,annot=True, fmt=".3f",linewidth=.5,square=True,cmap='Blues_r');
plt.xlabel('Actual Label');
plt.ylabel('Predicted Label');
title ='Decision Tree Accuracy Score: {0}'.format(tree_score)
plt.title(title,size=15);
No description has been provided for this image
In [40]:
# Biểu diễn cây ID3
In [41]:
fig, ax = plt.subplots(figsize=(50,24))
tree.plot_tree(clf,filled=True,fontsize=10)
plt.savefig('decision_tree',dpi=100)
plt.show()
No description has been provided for this image
In [ ]:
#  thay thế giá trị criterion=’gini’
In [42]:
clf = tree.DecisionTreeClassifier(criterion="gini",random_state=0)
clf.fit(x_train,y_train)
Out[42]:
DecisionTreeClassifier(random_state=0)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier(random_state=0)
In [43]:
tree_pred = clf.predict(x_test)
tree_score = metrics.accuracy_score(y_test,tree_pred)
print("Accruracy:",tree_score)
print("Report:",metrics.classification_report(y_test,tree_pred))
Accruracy: 0.8122841965471448
Report:               precision    recall  f1-score   support

       <=50K       0.87      0.88      0.88     11360
        >50K       0.62      0.60      0.61      3700

    accuracy                           0.81     15060
   macro avg       0.75      0.74      0.74     15060
weighted avg       0.81      0.81      0.81     15060

In [44]:
tree_cm = metrics.confusion_matrix(y_test,tree_pred)
In [45]:
plt.figure(figsize=(12,12))
sns.heatmap(tree_cm,annot=True, fmt=".3f",linewidth=.5,square=True,cmap='Blues_r')
plt.xlabel('Actual Label')
plt.ylabel('Predicted Label')
title ='Decision Tree Accuracy Score: {0}'.format(tree_score)
plt.title(title,size=15)
Out[45]:
Text(0.5, 1.0, 'Decision Tree Accuracy Score: 0.8122841965471448')
No description has been provided for this image
In [46]:
fig, ax = plt.subplots(figsize=(50,24))
tree.plot_tree(clf,filled=True,fontsize=10)
plt.savefig('decision_tree_gini',dpi=100)
plt.show()
No description has been provided for this image
In [ ]:
# Xây dựng mô hình phân lớp bằng thuật toán Naïve Bayes và kiểm tra kết quả đạt được
In [47]:
gnb = GaussianNB()
In [48]:
bayes_pred = gnb.fit(x_train, y_train).predict(x_test)
In [49]:
bayes_score = metrics.accuracy_score(y_test, bayes_pred)
In [50]:
print("Accuracy: ", bayes_score)
print("Report: ", metrics.classification_report(y_test, bayes_pred))
Accuracy:  0.8029216467463479
Report:                precision    recall  f1-score   support

       <=50K       0.93      0.80      0.86     11360
        >50K       0.57      0.82      0.67      3700

    accuracy                           0.80     15060
   macro avg       0.75      0.81      0.76     15060
weighted avg       0.84      0.80      0.81     15060

In [51]:
bayes_cm = metrics.confusion_matrix(y_test, bayes_pred)
plt.figure(figsize=(12,12))
sns.heatmap(bayes_cm,annot=True, fmt=".3f",linewidth=.5,square=True,cmap='Greens');
plt.ylabel('Actual label');
plt.xlabel('Predicted label');
title = 'Native Bayes Accuracy Score: {0}'.format(bayes_score)
plt.title(title, size=15);
No description has been provided for this image
In [53]:
# 10. SO SÁNH KẾT QUẢ ĐẠT ĐƯỢC CỦA CÁC MÔ HÌNH TRÊN
# Dựa vào mô hình ta có độ chính xác của các thuật toán lần lượt là:
# Thuật toán cây ID3 với 81.753%
# Thuật toán Naive Bayes với 80.292%
# Thuật toán cây CART với 81.228%
# Vậy đối với mô hình này sử dụng thuật toán cây quyết định ID3 cho ra độ chính xác cao nhất

# Còn nếu xét trên từng lớp

# Với số lớp <= 50k ID3: 0.88, CART: 0.88, Naive: 0.86 Cả ba thuật toán cho ra độ chính xác khá cao
# Với số lớp > 50k ID3: 0.62, CART: 0.61, Naive: 0.67 Cả ba thuật toán vẫn chưa cho ra được sự chính xác caoược sự chí

# Kết quả này cho thấy rằng trong trường hợp này, dù ID3 có độ chính xác cao nhất trên toàn bộ tập dữ liệu, nhưng nó vẫn không thể hiện sự ổn định và hiệu suất tốt khi xử lý các lớp dữ liệu lớn. Việc sử dụng Naive Bayes có thể là lựa chọn hợp lý hơn trong trường hợp này, với độ chính xác tương đối ổn định trên các lớp dữ liệu khác nhau. Tuy nhiên, việc chọn thuật toán phụ thuộc vào nhiều yếu tố khác nhau như cấu trúc dữ liệu, kích thước tập dữ liệu, và mục tiêu cụ thể của bài toán.nh xác cao
In [ ]: